
library(support.CEs)
library(tidyverse)
library(foreign)
library(survival)
library(modelsummary)
library(cjoint)
library(ggplot2)
library(data.table)
library(readstata13)
#plot theme
plot_theme =  theme_bw()+ theme(text = element_text(size=10), 
                                panel.grid.major = element_blank(),
                                panel.grid.minor = element_blank(),
                                panel.grid.major.y = element_line(colour = "#CCCCCC"),
                                axis.line = element_line(colour = "black"),
                                strip.background=element_rect(colour=NA, fill="#DDDDDD"),
                                legend.title=element_blank(), 
                                legend.position="top",
                                axis.title=element_text(size=14,face="bold"), 
                                axis.text=element_text(size=12),
                                strip.text.x = element_text(size = 12),
                                legend.text=element_text(size=12), 
                                axis.text.x = element_text(angle = 45, hjust = 1, vjust=1)) 
#plot features
pd<-position_dodge(.2)
pd_text<-position_dodge(1)
#number of runs for bootstrap 
runs = 2500

results_data <- read.dta13("lucid_formatted.dta")

results_data<-results_data %>% 
  select(id, profile_chosen,mark_up, country, rating, task_num, ethnocentrism) %>% 
  rename(respondent_id = id)

#format
results_data<- results_data %>% 
  mutate(country = factor(country, levels=c("United States", "India", "China","Canada","Japan"))) %>%
  mutate(mark_up_cat = case_when(
    mark_up == "1" ~ "100", 
    mark_up == "0.25" ~ "25", 
    mark_up == "0.5" ~ "50",
    mark_up == "0" ~ "0")) %>% 
  mutate(mark_up_cat =factor(mark_up_cat, levels=c("0", "25", "50", "100"))) %>% 
  mutate(rating_cat = factor(rating, levels =c("5 out of 5 stars", "4 out of 5 stars", "3 out of 5 stars")))


with(results_data, summary(country))
with(results_data, summary(mark_up_cat))

##ethno_bins (new ethnocentrism measure)
with(results_data, summary(ethnocentrism))

results_data <- results_data %>%
  mutate(ethno_factor = case_when(
    ethnocentrism <= .4375 ~ "Low ethnocentrism", 
    ethnocentrism > .4375 & ethnocentrism < .6875  ~ "Medium ethnocentrism", 
    ethnocentrism >= .6875  ~ "High ethnocentrism", 
  )) %>% mutate(ethno_factor = as.factor(ethno_factor))

with(results_data, table(ethno_factor))

#################
# marginal means
################

library("cregg")

#specify our cjoint design 
c_design <- profile_chosen ~ mark_up_cat + rating_cat + country 

# marginal means
mm_full <- cj(results_data, c_design, id = ~respondent_id, estimate = "mm")
mm_full$BY = "Full sample"

### marginal means  -- by ethno
mm_ethno_bins <- cj(results_data, c_design, id = ~respondent_id, by = ~ ethno_factor, estimate = "mm")

mm_full_plot  <- mm_full %>% filter(feature == "country") %>% 
  select(BY, estimate, lower, upper, statistic, level ) 
mm_ethno_bins_plot  <- mm_ethno_bins %>% filter(feature == "country" & BY != "Medium ethnocentrism") %>% 
  select(BY, estimate, lower, upper, statistic, level ) 


data_for_cjoint <- results_data %>%  
  drop_na() %>%
  mutate(country = relevel(country, ref="United States")) %>%
  mutate(rating_cat = relevel(rating_cat, ref="5 out of 5 stars"))
summary(data_for_cjoint)

with(results_data, table(ethno_factor))

set.seed(1234)
results_vector = list()
estimates_results = list()
resp_ids <- unique(data_for_cjoint$respondent_id)

for (run in 1:runs) {

  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  
  bootSample <- as.data.table(data_for_cjoint)
  setkey(bootSample, "respondent_id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  
  #store complete results
  results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country, data=bootSample, respondent.id = "respondent_id")
  
  #store estimates 
  country <-  as.tibble(t(results_vector[[run]]$estimates$country), rownames="est")
  markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
  #store the estimates
  estimates_results[[run]] <- bind_rows(country, markup) %>% mutate(run = run)
  
}


full_sample_results <- bind_rows(estimates_results) 

full_sample_markups <- full_sample_results %>% filter(grepl('markup', est)) %>% 
  select(est, AMCE, run) %>%
  rename(markup_est = AMCE, markup = est)


full_sample_results_summary_disag <-  full_sample_results %>% 
  filter(grepl('country', est)) %>%  
  left_join(full_sample_markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = AMCE/(markup_est/as.numeric(markup_num))) %>%
  group_by(est, markup_num) %>%
  summarise(mean_amce = mean(AMCE), 
            lower_amce =quantile(AMCE, probs=c(.025)), 
            upper_amce = quantile(AMCE, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975))) %>%
  mutate(ethno_factor = "Full sample") %>%
  rename(main = est)


amce_full_sample_results_disag <- full_sample_results_summary_disag %>% select(main, mean_amce, lower_amce, upper_amce, ethno_factor, markup_num) %>%
  rename(mean = mean_amce, upper = upper_amce, lower=lower_amce) %>% mutate(analysis = "AMCE")

mwtp_full_sample_results_disag <- full_sample_results_summary_disag %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor, markup_num ) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")




full_sample_results_summary <-  full_sample_results %>% filter(grepl('country', est)) %>%  left_join(full_sample_markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = AMCE/(markup_est/as.numeric(markup_num))) %>%
  group_by(est) %>%
  summarise(mean_amce = mean(AMCE), 
            lower_amce =quantile(AMCE, probs=c(.025)), 
            upper_amce = quantile(AMCE, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975))) %>%
  mutate(ethno_factor = "Full sample") %>%
  rename(main = est)

amce_full_sample_results <- full_sample_results_summary %>% select(main, mean_amce, lower_amce, upper_amce, ethno_factor) %>%
  rename(mean = mean_amce, upper = upper_amce, lower=lower_amce) %>% mutate(analysis = "AMCE")

mwtp_full_sample_results <- full_sample_results_summary %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor ) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")




###non-parametric bootstrap 

results_vector = list()
estimates_results = list()
resp_ids <- unique(data_for_cjoint$respondent_id)
set.seed(1234)

for (run in 1:runs) {

  samp <- sample(x = resp_ids, size = length(resp_ids), replace = TRUE)
  bootSample <- as.data.table(data_for_cjoint)
  setkey(bootSample, "respondent_id")
  bootSample <- bootSample[J(samp), allow.cartesian = TRUE]
  
  #store complete results
  results_vector[[run]] <- cjoint::amce(profile_chosen ~ mark_up_cat + rating_cat + country*ethno_factor, data=bootSample,  respondent.varying = "ethno_factor", respondent.id = "respondent_id")
  
  #store estimates 
  country_ethno <-  as.tibble(t(results_vector[[run]]$cond.estimates$`country:ethnofactor`), rownames="est")
  country <-  as.tibble(t(results_vector[[run]]$cond.estimates$country), rownames="est")
  ethno <- as.tibble(t(results_vector[[run]]$cond.estimates$ethnofactor), rownames="est")
  markup <- as.tibble(t(results_vector[[run]]$estimates$markupcat), rownames="est")
  estimates_results[[run]] <- bind_rows(country_ethno, country, ethno, markup) %>% mutate(run = run)
  
}


results <- bind_rows(estimates_results) %>% separate(est, sep=":", c("main", "conditional") )

markups <- results %>% filter(grepl('markup', main)) %>% 
  select(main, AMCE, run) %>%
  rename(markup_est = AMCE, markup = main)

high_ethno <- results %>% filter(grepl('country', main) & is.na(conditional)) %>% 
  mutate(ethno_factor = "High ethnocentrism") %>%
  select(`Conditional Estimate`, ethno_factor, run, main) %>%
  rename(estimate = `Conditional Estimate` )

low_ethno <- results %>% 
  filter(grepl('country', main) & (is.na(conditional) | conditional == "ethnofactorLowethnocentrism") ) %>%
  mutate(ethno_factor = "Low ethnocentrism") %>%
  group_by(run, main, ethno_factor) %>%
  summarize( estimate =  sum (`Conditional Estimate`))  %>%
  ungroup()
  


full_results_disag <- bind_rows(high_ethno, low_ethno) %>% 
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
  group_by(main, ethno_factor, markup_num) %>%
  summarise(mean_amce = mean(estimate), 
            lower_amce =quantile(estimate, probs=c(.025)), 
            upper_amce = quantile(estimate, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975)))



amce_results_disag <- full_results_disag %>% select(main, mean_amce, lower_amce, upper_amce, ethno_factor, markup_num) %>%
  rename(mean = mean_amce, upper = upper_amce, lower=lower_amce) %>% mutate(analysis = "AMCE")

mwtp_results_disag <- full_results_disag %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor, markup_num) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")


full_results <- bind_rows(high_ethno, low_ethno) %>% 
  left_join(markups, by=c("run")) %>%
  mutate(markup_num = str_replace(markup, "markupcat", "")) %>%
  mutate(mwtp = estimate/(markup_est/as.numeric(markup_num))) %>%
  group_by(main, ethno_factor) %>%
  summarise(mean_amce = mean(estimate), 
            lower_amce =quantile(estimate, probs=c(.025)), 
            upper_amce = quantile(estimate, probs=c(.975)), 
            mean_mwtp = mean(mwtp), 
            lower_mwtp =quantile(mwtp, probs=c(.025)), 
            upper_mwtp = quantile(mwtp, probs=c(.975)))


amce_results <- full_results %>% select(main, mean_amce, lower_amce, upper_amce, ethno_factor) %>%
  rename(mean = mean_amce, upper = upper_amce, lower=lower_amce) %>% mutate(analysis = "AMCE")

mwtp_results <- full_results %>% select(main, mean_mwtp, lower_mwtp, upper_mwtp, ethno_factor ) %>%
  rename(mean = mean_mwtp, upper = upper_mwtp, lower=lower_mwtp) %>% mutate(analysis = "MWTP")

mwtp_results

#combine the cjoint results with the cregg results
plot_results <- bind_rows(amce_results, mwtp_results, amce_full_sample_results, mwtp_full_sample_results) %>%
  rename(estimate = mean, BY = ethno_factor, statistic = analysis, level = main)



plot_results_disag <- bind_rows(amce_full_sample_results_disag, mwtp_full_sample_results_disag, amce_results_disag, mwtp_results_disag) %>%
  rename(estimate = mean, BY = ethno_factor, statistic = analysis, level = main)



#analysis labels

analysis_labels <- c("Marginal mean\n(relative to all other countries)", "AMCE\n(relative to United States)", "Implied MWTP\n(relative to United States)")

for_plot <- bind_rows(plot_results, mm_full_plot, mm_ethno_bins_plot ) %>%
  mutate(level = str_remove(level, "^country")) %>%
  mutate(analysis_label = case_when(
           statistic == "AMCE" ~ analysis_labels[2], 
           statistic == "MWTP" ~analysis_labels[3],
           statistic == "mm" ~ analysis_labels[1], 
           TRUE ~ as.character(statistic))) %>% 
  mutate(analysis_label = factor(analysis_label, levels = analysis_labels )) %>%
  mutate(BY = case_when(
    BY == "Full sample" ~ "Sample-wide average", 
    TRUE ~ BY )) %>%
  mutate(BY = factor(BY, levels=c("Sample-wide average", "Low ethnocentrism", "High ethnocentrism"))) %>%
  mutate(level = factor(level, levels = c("Canada","Japan","China","India","United States"))) 
  
  

pd_combined<-position_dodge(.75)
combined_plot_short_article <- ggplot(data=for_plot, aes(y=estimate, x=level, group=BY)) +
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0, position=pd_combined)+
  geom_point(aes(fill=BY, shape=BY),color="black",size=4,position=pd_combined) + 
  xlab("Country") +
  ylab("Estimate") +
  scale_shape_manual(values=c(22,21, 23, 24))+
  facet_wrap(analysis_label~., scales="free") + plot_theme
combined_plot_short_article


ggsave("FigureA6.png", combined_plot_short_article, dpi=600, width=14, height=7)







          